# -*- coding: UTF-8 -*-

from urllib import request

import ssl, json

# 执行javaScript的模块 ,需要 pip install pyexecjs
import execjs

# 正则表达式model
import re

# 自己编写的html解析类
from util import Myhtmlparser


#  从傻猫的产品页面获取查询价格的js
def get_price_js_from_cat(content):
    if content and content[0] == 200:
        content_str = content[2].decode(content[1])

        pass


# 解析天猫的价格数据,获取指定code的价格
def parse_price_content_from_cat(data, code):
    pass


# 获取电商平台的价格,tmall,jd
def get_price_from_url(url):
    url_type = decide_url_type(url)
    if url_type == 1:
        return get_price_from_dog(url)
    if url_type == 2:
        return get_price_from_cat(url, None)


# 获取天猫的产品价格
def get_price_from_cat(url, price_type):
    item_id = get_item_id_from_cat(url)
    skuid = get_product_code_from_cat(url)
    return tmall_price(skuid, item_id, price_type)


# 获取狗东价格
def get_price_from_dog(url):
    code = get_product_code_from_dog(url)
    return get_price_by_code_from_jd(code)


# 查询天猫的价格，skuid：产品id, itemId:产品页面id, price_type: 价格类型（yj:原价，cxj:促销价,ldj:来电价,如果价格类型不填，默认获取这3种价格中的最低价）
def tmall_price(skuid, itemId, price_type):
    url = 'https://mdskip.taobao.com/core/initItemDetail.htm?itemId=' + itemId
    headers = {"Referer": "https://detail.tmall.com/item.htm?id={}".format(itemId)}
    headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; )'
    ssl._create_default_https_context = ssl._create_unverified_context
    req = request.Request(url, headers=headers)
    # content = get_url_content(url)
    response = request.urlopen(req)
    html = response.read().decode('gbk')
    result = json.loads(html)
    price_info = result["defaultModel"]["itemPriceResultDO"]["priceInfo"]
    #  傻猫 会返回多个价格（原价：，来电价，促销价）
    yj = price_info[skuid]['price']  # 原价
    ldj = price_info[skuid]['suggestivePromotionList'][0]['price']  # 来电价
    cxj = price_info[skuid]['promotionList'][0]['price']  # 促销价
    price = yj
    if not price_type:
        price = price if price <= ldj else ldj
        price = price if price <= cxj else cxj
    elif price_type == 'yj':
        price = yj
    elif price_type == 'ldj':
        price = ldj
    elif price_type == 'cxj':
        price = cxj
    return price


# 执行js,param为执行js可能需要的参数
def exec_js(js, param):
    pass


# 获取天猫产品页面的itemId
def get_item_id_from_cat(url):
    return get_param_value_from_url(url, 'id')


# 从url获取指定的字段值
def get_param_value_from_url(url, param):
    # 1. ?分割url，获取参数字符串  2. 用&符号分割字符串
    param_list = url.split('?')[1].split('&')
    for param_str in param_list:
        if param_str.find(param + '=') != -1:  # 3 . 用=号分割字符串
            return param_str.split('=')[1]
        else:
            continue
    return None


# 获取天猫产品详情页面的产品code,code包含在url中的参数里，字段为skuId
def get_product_code_from_cat(url):
    return get_param_value_from_url(url, 'skuId')


# 获取商品code,狗东的商品code就是url中html前面的部分
def get_product_code(url):
    url_type = decide_url_type(url)
    if url_type == 1:
        return get_product_code_from_dog(url)
    elif url_type == 2:
        return get_product_code_from_cat(url)
    else:
        return None


# 从url中获取狗东的产品code
def get_product_code_from_dog(url):
    code = url.split('/')[-1].split('.')[0]
    # code = diagonal_list[len(diagonal_list) - 1].split('.')[0]
    return code


# 判断爬取的url属于哪个电商平台，1-狗东，2-某猫 ，3-其他
def decide_url_type(url):
    if url.find("//item.jd.com") != -1:
        return 1
    elif url.find("//detail.tmall.com") != -1:
        return 2
    else:
        return 3


# 读取指定url的内容,返回的字节
def get_url_content(url):
    if url.find("https") != -1:
        ssl._create_default_https_context = ssl._create_unverified_context
    res = request.urlopen(url)
    code = res.getcode()
    info = res.info()
    charset = None
    if info:
        m = re.findall(r'charset=([a-zA-Z0-9_-]+)', ' '.join(info.get_all('Content-Type')), re.I)
        if m:
            charset = str(m[0]).lower()
    if code == 200:
        html = res.read()
    return (code, charset, html)


# 解析狗东产品价格content的内容，返回价格
def parse_price_content_from_dog(content):
    content_str = content[2].decode(content[1])  # 字节转字符串
    start_index = content_str.find('{')
    end_index = content_str.find('}')
    price_json = json.loads(content_str[start_index:end_index + 1])
    return price_json['p']


# 根据狗东产品code获取狗东价格
def get_price_by_code_from_jd(code):
    skuids = 'J_' + code
    price_url = 'https://p.3.cn/prices/mgets?callback=result&type=1&area=1&pdtk=&pduid=0&pdpin=&pin=null&pdbp=0&skuIds=' + skuids + '&ext=11000000&source=asd3w'
    content = get_url_content(price_url)
    return parse_price_content_from_dog(content)


# 获取页面中指定标签，指定属性的值,content为get_url_content(url)的返回值
def handle_html_content(content, tag_name, attr_name, value_type):
    if not content:
        return None
    else:
        if content and content[0] == 200:
            parser = Myhtmlparser()
            parser.init(value_type, tag_name, attr_name)
            return parser.get_values()
        else:
            return None


# 爬去url的页面，然后页面内容到path
def downloan_page(url, path):
    page = get_url_content(url)

    if page:
        with open(path, 'w', encoding=page[1]) as f:
            # 网页编码是gbk，从gbk转为unicode的字符串
            page = page[2].decode(page[1])
            # 从unicode转为utf-8
            f.write(page)
        print("页面保存完成！" + path)
    else:
        print("获取url页面内容失败," + url)


# url = 'https://item.jd.com/1098426.html'
# url = 'https://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.1.287a107flNb8OL&id=546253216663&skuId=3300964720544&areaId=510100&standard=1&user_id=2616970884&cat_id=2&is_b=1&rn=88b636bf1a7b740c76b2439607b24b21'
url = 'https://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.6.12b37dccOwQJO9&id=557575103895&skuId=3669430334595&user_id=2541050533&cat_id=50069259&is_b=1&rn=cf90cfd6e0f69afafacb7436001867c9'
# url = ' https://detail.tmall.com/item.htm?spm=a1z10.5-b.w4011-16763141439.55.21df2bad1HVKCi&id=561029820393&rn=9b02580c7e442c7df2ceb573563a9973&abbucket=18&skuId=3512909096494'
# url = 'https://item.jd.com/7044363.html?jd_pop=e16dcc66-bec5-48a6-9082-1ab790f1f0d6&abt=0'
# file = 'D:/test/1.html'
# downloan_page(url, file)



# 测试获取价格
if __name__ == "__main__":
    print(get_price_from_url(url))
